Index: gcc/genautomata.c
===================================================================
--- a/src/gcc/genautomata.c	(revision 146514)
+++ b/src/gcc/genautomata.c	(working copy)
@@ -1,5 +1,5 @@
 /* Pipeline hazard description translator.
-   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008
+   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009
    Free Software Foundation, Inc.
 
    Written by Vladimir Makarov <vmakarov@redhat.com>
@@ -22,21 +22,25 @@
 
 /* References:
 
-   1. Detecting pipeline structural hazards quickly. T. Proebsting,
+   1. The finite state automaton based pipeline hazard recognizer and
+      instruction scheduler in GCC.  V. Makarov.  Proceedings of GCC
+      summit, 2003.
+
+   2. Detecting pipeline structural hazards quickly. T. Proebsting,
       C. Fraser. Proceedings of ACM SIGPLAN-SIGACT Symposium on
       Principles of Programming Languages, pages 280--286, 1994.
 
       This article is a good start point to understand usage of finite
       state automata for pipeline hazard recognizers.  But I'd
-      recommend the 2nd article for more deep understanding.
+      recommend the 1st and 3rd article for more deep understanding.
 
-   2. Efficient Instruction Scheduling Using Finite State Automata:
+   3. Efficient Instruction Scheduling Using Finite State Automata:
       V. Bala and N. Rubin, Proceedings of MICRO-28.  This is the best
       article about usage of finite state automata for pipeline hazard
       recognizers.
 
-   The current implementation is different from the 2nd article in the
-   following:
+   The current implementation is described in the 1st article and it
+   is different from the 3rd article in the following:
 
    1. New operator `|' (alternative) is permitted in functional unit
       reservation which can be treated deterministically and
@@ -463,7 +467,10 @@
      insn.  */
   int insn_num;
   /* The following field value is list of bypasses in which given insn
-     is output insn.  */
+     is output insn.  Bypasses with the same input insn stay one after
+     another in the list in the same order as their occurrences in the
+     description but the bypass without a guard stays always the last
+     in a row of bypasses with the same input insn.  */
   struct bypass_decl *bypass_list;
 
   /* The following fields are defined by automaton generator.  */
@@ -2367,18 +2374,67 @@
 }
 
 
-/* The function searches for bypass with given IN_INSN_RESERV in given
-   BYPASS_LIST.  */
-static struct bypass_decl *
-find_bypass (struct bypass_decl *bypass_list,
-	     struct insn_reserv_decl *in_insn_reserv)
+/* The function inserts BYPASS in the list of bypasses of the
+   corresponding output insn.  The order of bypasses in the list is
+   decribed in a comment for member `bypass_list' (see above).  If
+   there is already the same bypass in the list the function reports
+   this and does nothing.  */
+static void
+insert_bypass (struct bypass_decl *bypass)
 {
-  struct bypass_decl *bypass;
-
-  for (bypass = bypass_list; bypass != NULL; bypass = bypass->next)
-    if (bypass->in_insn_reserv == in_insn_reserv)
-      break;
-  return bypass;
+  struct bypass_decl *curr, *last;
+  struct insn_reserv_decl *out_insn_reserv = bypass->out_insn_reserv;
+  struct insn_reserv_decl *in_insn_reserv = bypass->in_insn_reserv;
+  
+  for (curr = out_insn_reserv->bypass_list, last = NULL;
+       curr != NULL;
+       last = curr, curr = curr->next)
+    if (curr->in_insn_reserv == in_insn_reserv)
+      {
+	if ((bypass->bypass_guard_name != NULL
+	     && curr->bypass_guard_name != NULL
+	     && ! strcmp (bypass->bypass_guard_name, curr->bypass_guard_name))
+	    || bypass->bypass_guard_name == curr->bypass_guard_name)
+	  {
+	    if (bypass->bypass_guard_name == NULL)
+	      {
+		if (!w_flag)
+		  error ("the same bypass `%s - %s' is already defined",
+			 bypass->out_insn_name, bypass->in_insn_name);
+		else
+		  warning (0, "the same bypass `%s - %s' is already defined",
+			   bypass->out_insn_name, bypass->in_insn_name);
+	      }
+	    else if (!w_flag)
+	      error ("the same bypass `%s - %s' (guard %s) is already defined",
+		     bypass->out_insn_name, bypass->in_insn_name,
+		     bypass->bypass_guard_name);
+	    else
+	      warning
+		(0, "the same bypass `%s - %s' (guard %s) is already defined",
+		 bypass->out_insn_name, bypass->in_insn_name,
+		 bypass->bypass_guard_name);
+	    return;
+	  }
+	if (curr->bypass_guard_name == NULL)
+	  break;
+	if (curr->next == NULL || curr->next->in_insn_reserv != in_insn_reserv)
+	  {
+	    last = curr;
+	    break;
+	  }
+	  
+      }
+  if (last == NULL)
+    {
+      bypass->next = out_insn_reserv->bypass_list;
+      out_insn_reserv->bypass_list = bypass;
+    }
+  else
+    {
+      bypass->next = last->next;
+      last->next = bypass;
+    }
 }
 
 /* The function processes pipeline description declarations, checks
@@ -2391,7 +2447,6 @@
   decl_t decl_in_table;
   decl_t out_insn_reserv;
   decl_t in_insn_reserv;
-  struct bypass_decl *bypass;
   int automaton_presence;
   int i;
 
@@ -2514,36 +2569,7 @@
 		= DECL_INSN_RESERV (out_insn_reserv);
 	      DECL_BYPASS (decl)->in_insn_reserv
 		= DECL_INSN_RESERV (in_insn_reserv);
-	      bypass
-		= find_bypass (DECL_INSN_RESERV (out_insn_reserv)->bypass_list,
-			       DECL_BYPASS (decl)->in_insn_reserv);
-	      if (bypass != NULL)
-		{
-		  if (DECL_BYPASS (decl)->latency == bypass->latency)
-		    {
-		      if (!w_flag)
-			error
-			  ("the same bypass `%s - %s' is already defined",
-			   DECL_BYPASS (decl)->out_insn_name,
-			   DECL_BYPASS (decl)->in_insn_name);
-		      else
-			warning
-			  (0, "the same bypass `%s - %s' is already defined",
-			   DECL_BYPASS (decl)->out_insn_name,
-			   DECL_BYPASS (decl)->in_insn_name);
-		    }
-		  else
-		    error ("bypass `%s - %s' is already defined",
-			   DECL_BYPASS (decl)->out_insn_name,
-			   DECL_BYPASS (decl)->in_insn_name);
-		}
-	      else
-		{
-		  DECL_BYPASS (decl)->next
-		    = DECL_INSN_RESERV (out_insn_reserv)->bypass_list;
-		  DECL_INSN_RESERV (out_insn_reserv)->bypass_list
-		    = DECL_BYPASS (decl);
-		}
+	      insert_bypass (DECL_BYPASS (decl));
 	    }
 	}
     }
@@ -8159,19 +8185,32 @@
 			    (advance_cycle_insn_decl)->insn_num));
 	    fprintf (output_file, "        case %d:\n",
 		     bypass->in_insn_reserv->insn_num);
-	    if (bypass->bypass_guard_name == NULL)
-	      fprintf (output_file, "          return %d;\n",
-		       bypass->latency);
-	    else
+	    for (;;)
 	      {
-		fprintf (output_file,
-			 "          if (%s (%s, %s))\n",
-			 bypass->bypass_guard_name, INSN_PARAMETER_NAME,
-			 INSN2_PARAMETER_NAME);
-		fprintf (output_file,
-			 "            return %d;\n          break;\n",
-			 bypass->latency);
+		if (bypass->bypass_guard_name == NULL)
+		  {
+		    gcc_assert (bypass->next == NULL
+				|| (bypass->in_insn_reserv
+				    != bypass->next->in_insn_reserv));
+		    fprintf (output_file, "          return %d;\n",
+			     bypass->latency);
+		  }
+		else
+		  {
+		    fprintf (output_file,
+			     "          if (%s (%s, %s))\n",
+			     bypass->bypass_guard_name, INSN_PARAMETER_NAME,
+			     INSN2_PARAMETER_NAME);
+		    fprintf (output_file, "            return %d;\n",
+			     bypass->latency);
+		  }
+		if (bypass->next == NULL
+		    || bypass->in_insn_reserv != bypass->next->in_insn_reserv)
+		  break;
+		bypass = bypass->next;
 	      }
+	    if (bypass->bypass_guard_name != NULL)
+	      fprintf (output_file, "          break;\n");
 	  }
 	fputs ("        }\n      break;\n", output_file);
       }
Index: gcc/rtl.def
===================================================================
--- a/src/gcc/rtl.def	(revision 146514)
+++ b/src/gcc/rtl.def	(working copy)
@@ -1088,7 +1088,11 @@
    guard for the bypass.  The function will get the two insns as
    parameters.  If the function returns zero the bypass will be
    ignored for this case.  Additional guard is necessary to recognize
-   complicated bypasses, e.g. when consumer is load address.  */
+   complicated bypasses, e.g. when consumer is load address.  If there
+   are more one bypass with the same output and input insns, the
+   chosen bypass is the first bypass with a guard in description whose
+   guard function returns nonzero.  If there is no such bypass, then
+   bypass without the guard function is chosen.  */
 DEF_RTL_EXPR(DEFINE_BYPASS, "define_bypass", "issS", RTX_EXTRA)
 
 /* (define_automaton string) describes names of automata generated and
Index: gcc/ChangeLog.ix86
===================================================================
--- a/src/gcc/ChangeLog.ix86	(revision 0)
+++ b/src/gcc/ChangeLog.ix86	(revision 0)
@@ -0,0 +1,121 @@
+2009-04-20  H.J. Lu  <hongjiu.lu@intel.com>
+
+	Backport from mainline:
+	2009-04-20  Joey Ye  <joey.ye@intel.com>
+		    Xuepeng Guo <xuepeng.guo@intel.com>
+		    H.J. Lu  <hongjiu.lu@intel.com>
+
+	* config/i386/atom.md: Add bypasses with ix86_dep_by_shift_count.
+
+	* config/i386/i386.c (LEA_SEARCH_THRESHOLD): New macro.
+	(IX86_LEA_PRIORITY): Likewise.
+	(distance_non_agu_define): New function.
+	(distance_agu_use): Likewise.
+	(ix86_lea_for_add_ok): Likewise.
+	(ix86_dep_by_shift_count): Likewise.
+
+	* config/i386/i386.md: Call ix86_lea_for_add_ok to decide we
+	should split for LEA.
+
+	* config/i386/i386-protos.h (ix86_lea_for_add_ok): Declare new
+	function.
+	(ix86_dep_by_shift_count): Likewise.
+
+2009-04-07  H.J. Lu  <hongjiu.lu@intel.com>
+
+	Backport from mainline:
+	2009-04-07  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* doc/invoke.texi: Document Atom support.
+
+2009-04-06  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* config/i386/i386.md: Revert 2 accidental checkins.
+
+2009-04-06  H.J. Lu  <hongjiu.lu@intel.com>
+
+	Backport from mainline:
+	2009-04-06  Joey Ye  <joey.ye@intel.com>
+		    Xuepeng Guo <xuepeng.guo@intel.com>
+		    H.J. Lu  <hongjiu.lu@intel.com>
+
+	Atom pipeline model, tuning and insn selection.
+	* config.gcc (atom): Add atom config options and target.
+
+	* config/i386/atom.md: New.
+
+	* config/i386/i386.c (atom_cost): New cost.
+	(m_ATOM): New macro flag.
+	(initial_ix86_tune_features): Set m_ATOM.
+	(x86_accumulate_outgoing_args): Likewise.
+	(x86_arch_always_fancy_math_387): Likewise.
+	(processor_target): Add Atom cost.
+	(cpu_names): Add Atom cpu name.
+	(override_options): Set Atom ISA.
+	(ix86_issue_rate): New case PROCESSOR_ATOM.
+	(ix86_adjust_cost): Likewise.
+
+	* config/i386/i386.h (TARGET_ATOM): New target macro.
+	(ix86_tune_indices): Add X86_TUNE_OPT_AGU.
+	(TARGET_OPT_AGU): New target option.
+	(target_cpu_default): Add TARGET_CPU_DEFAULT_atom.
+	(processor_type): Add PROCESSOR_ATOM.
+
+	* config/i386/i386.md (cpu): Add new value "atom".
+	(use_carry, movu): New attr.
+	(atom.md): Include atom.md.
+	(adddi3_carry_rex64): Set attr "use_carry".
+	(addqi3_carry): Likewise.
+	(addhi3_carry): Likewise.
+	(addsi3_carry): Likewise.
+	(*addsi3_carry_zext): Likewise.
+	(subdi3_carry_rex64): Likewise.
+	(subqi3_carry): Likewise.
+	(subhi3_carry): Likewise.
+	(subsi3_carry): Likewise.
+	(x86_movdicc_0_m1_rex64): Likewise.
+	(*x86_movdicc_0_m1_se): Likewise.
+	(x86_movsicc_0_m1): Likewise.
+	(*x86_movsicc_0_m1_se): Likewise.
+	(*adddi_1_rex64): Emit add insn as much as possible.
+	(*addsi_1): Likewise.
+	(return_internal): Set atom_unit.
+	(return_internal_long): Likewise.
+	(return_pop_internal): Likewise.
+	(*rcpsf2_sse): Set atom_sse_attr attr.
+	(*qrt<mode>2_sse): Likewise.
+
+2009-04-02  H.J. Lu  <hongjiu.lu@intel.com>
+
+	Backport from mainline:
+	2009-04-02  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* config/i386/i386.c (ix86_abi): Move initialization to ...
+	(override_options): Here.
+
+2009-03-29  H.J. Lu  <hongjiu.lu@intel.com>
+
+	Backport from mainline:
+	2009-03-29  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* config/i386/i386-protos.h (ix86_agi_dependent): New.
+
+	* config/i386/i386.c (ix86_agi_dependent): Rewrite.
+	(ix86_adjust_cost): Updated.
+
+2009-03-27  H.J. Lu  <hongjiu.lu@intel.com>
+
+	Backport from mainline:
+	2009-03-27  Vladimir Makarov  <vmakarov@redhat.com>
+
+	* genautomata.c: Add a new year to the copyright.  Add a new
+	reference.
+	(struct insn_reserv_decl): Add comments for member bypass_list.
+	(find_bypass): Remove.
+	(insert_bypass): New.
+	(process_decls): Use insert_bypass.
+	(output_internal_insn_latency_func): Output all bypasses with the
+	same input insn in one switch case.
+
+	* rtl.def (define_bypass): Describe bypass choice.
+	* doc/md.texi (define_bypass): Ditto.
Index: gcc/config.gcc
===================================================================
--- a/src/gcc/config.gcc	(revision 146514)
+++ b/src/gcc/config.gcc	(working copy)
@@ -1088,7 +1088,7 @@
 			tmake_file="${tmake_file} i386/t-linux64"
 			need_64bit_hwint=yes
 			case X"${with_cpu}" in
-			Xgeneric|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
+			Xgeneric|Xatom|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
 				;;
 			X)
 				if test x$with_cpu_64 = x; then
@@ -1097,7 +1097,7 @@
 				;;
 			*)
 				echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2
-				echo "generic core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
+				echo "generic atom core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
 				exit 1
 				;;
 			esac
@@ -1202,7 +1202,7 @@
 		# libgcc/configure.ac instead.
 		need_64bit_hwint=yes
 		case X"${with_cpu}" in
-		Xgeneric|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
+		Xgeneric|Xatom|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
 			;;
 		X)
 			if test x$with_cpu_64 = x; then
@@ -1211,7 +1211,7 @@
 			;;
 		*)
 			echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2
-			echo "generic core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
+			echo "generic atom core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
 			exit 1
 			;;
 		esac
@@ -2805,7 +2805,7 @@
 				esac
 				# OK
 				;;
-			"" | amdfam10 | barcelona | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | generic)
+			"" | amdfam10 | barcelona | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | atom | generic)
 				# OK
 				;;
 			*)
Index: gcc/config/i386/i386.h
===================================================================
--- a/src/gcc/config/i386/i386.h	(revision 146514)
+++ b/src/gcc/config/i386/i386.h	(working copy)
@@ -236,6 +236,7 @@
 #define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64)
 #define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
 #define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10)
+#define TARGET_ATOM (ix86_tune == PROCESSOR_ATOM)
 
 /* Feature tests against the various tunings.  */
 enum ix86_tune_indices {
@@ -300,6 +301,7 @@
   X86_TUNE_USE_VECTOR_FP_CONVERTS,
   X86_TUNE_USE_VECTOR_CONVERTS,
   X86_TUNE_FUSE_CMP_AND_BRANCH,
+  X86_TUNE_OPT_AGU,
 
   X86_TUNE_LAST
 };
@@ -387,6 +389,7 @@
 	ix86_tune_features[X86_TUNE_USE_VECTOR_CONVERTS]
 #define TARGET_FUSE_CMP_AND_BRANCH \
 	ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH]
+#define TARGET_OPT_AGU ix86_tune_features[X86_TUNE_OPT_AGU]
 
 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {
@@ -569,6 +572,7 @@
   TARGET_CPU_DEFAULT_prescott,
   TARGET_CPU_DEFAULT_nocona,
   TARGET_CPU_DEFAULT_core2,
+  TARGET_CPU_DEFAULT_atom,
 
   TARGET_CPU_DEFAULT_geode,
   TARGET_CPU_DEFAULT_k6,
@@ -2260,6 +2264,7 @@
   PROCESSOR_GENERIC32,
   PROCESSOR_GENERIC64,
   PROCESSOR_AMDFAM10,
+  PROCESSOR_ATOM,
   PROCESSOR_max
 };
 
Index: gcc/config/i386/i386.md
===================================================================
--- a/src/gcc/config/i386/i386.md	(revision 146514)
+++ b/src/gcc/config/i386/i386.md	(working copy)
@@ -316,7 +316,7 @@
 
 
 ;; Processor type.
-(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,
+(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,atom,
 		    generic64,amdfam10"
   (const (symbol_ref "ix86_schedule")))
 
@@ -612,6 +612,12 @@
 (define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any"
   (const_string "any"))
 
+;; Define attribute to classify add/sub insns that consumes carry flag (CF)
+(define_attr "use_carry" "0,1" (const_string "0"))
+
+;; Define attribute to indicate unaligned ssemov insns
+(define_attr "movu" "0,1" (const_string "0"))
+
 ;; Describe a user's asm statement.
 (define_asm_attributes
   [(set_attr "length" "128")
@@ -727,6 +733,7 @@
 (include "k6.md")
 (include "athlon.md")
 (include "geode.md")
+(include "atom.md")
 
 
 ;; Operand and operator predicates and constraints
@@ -5790,6 +5797,7 @@
   "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
   "adc{q}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "DI")])
 
@@ -5864,6 +5872,7 @@
   "ix86_binary_operator_ok (PLUS, QImode, operands)"
   "adc{b}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "QI")])
 
@@ -5876,6 +5885,7 @@
   "ix86_binary_operator_ok (PLUS, HImode, operands)"
   "adc{w}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "HI")])
 
@@ -5888,6 +5898,7 @@
   "ix86_binary_operator_ok (PLUS, SImode, operands)"
   "adc{l}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "SI")])
 
@@ -5901,6 +5912,7 @@
   "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
   "adc{l}\t{%2, %k0|%k0, %2}"
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "SI")])
 
@@ -6130,9 +6142,9 @@
    (set_attr "mode" "SI")])
 
 (define_insn "*adddi_1_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r")
-	(plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r")
-		 (match_operand:DI 2 "x86_64_general_operand" "rme,re,le")))
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
+	(plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r,r")
+		 (match_operand:DI 2 "x86_64_general_operand" "rme,re,0,le")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
 {
@@ -6153,6 +6165,10 @@
 	}
 
     default:
+      /* Use add as much as possible to replace lea for AGU optimization. */
+      if (which_alternative == 2 && TARGET_OPT_AGU)
+        return "add{q}\t{%1, %0|%0, %1}";
+        
       gcc_assert (rtx_equal_p (operands[0], operands[1]));
 
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
@@ -6171,8 +6187,11 @@
     }
 }
   [(set (attr "type")
-     (cond [(eq_attr "alternative" "2")
+     (cond [(and (eq_attr "alternative" "2") 
+                 (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
 	      (const_string "lea")
+            (eq_attr "alternative" "3")
+              (const_string "lea")
 	    ; Current assemblers are broken and do not allow @GOTOFF in
 	    ; ought but a memory context.
 	    (match_operand:DI 2 "pic_symbolic_operand" "")
@@ -6189,8 +6208,8 @@
 	(plus:DI (match_operand:DI 1 "register_operand" "")
 		 (match_operand:DI 2 "x86_64_nonmemory_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && reload_completed
-   && true_regnum (operands[0]) != true_regnum (operands[1])"
+  "TARGET_64BIT && reload_completed 
+   && ix86_lea_for_add_ok (PLUS, insn, operands)"
   [(set (match_dup 0)
 	(plus:DI (match_dup 1)
 		 (match_dup 2)))]
@@ -6394,9 +6413,9 @@
 
 
 (define_insn "*addsi_1"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r")
-	(plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r")
-		 (match_operand:SI 2 "general_operand" "g,ri,li")))
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r,r")
+	(plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r,r")
+		 (match_operand:SI 2 "general_operand" "g,ri,0,li")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (PLUS, SImode, operands)"
 {
@@ -6417,6 +6436,10 @@
 	}
 
     default:
+      /* Use add as much as possible to replace lea for AGU optimization. */
+      if (which_alternative == 2 && TARGET_OPT_AGU)
+        return "add{l}\t{%1, %0|%0, %1}";
+
       gcc_assert (rtx_equal_p (operands[0], operands[1]));
 
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
@@ -6433,7 +6456,10 @@
     }
 }
   [(set (attr "type")
-     (cond [(eq_attr "alternative" "2")
+     (cond [(and (eq_attr "alternative" "2") 
+                 (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
+               (const_string "lea")
+            (eq_attr "alternative" "3")
 	      (const_string "lea")
 	    ; Current assemblers are broken and do not allow @GOTOFF in
 	    ; ought but a memory context.
@@ -6451,8 +6477,7 @@
 	(plus (match_operand 1 "register_operand" "")
               (match_operand 2 "nonmemory_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "reload_completed
-   && true_regnum (operands[0]) != true_regnum (operands[1])"
+  "reload_completed && ix86_lea_for_add_ok (PLUS, insn, operands)" 
   [(const_int 0)]
 {
   rtx pat;
@@ -7553,6 +7578,7 @@
   "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)"
   "sbb{q}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "DI")])
 
@@ -7601,6 +7627,7 @@
   "ix86_binary_operator_ok (MINUS, QImode, operands)"
   "sbb{b}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "QI")])
 
@@ -7613,6 +7640,7 @@
   "ix86_binary_operator_ok (MINUS, HImode, operands)"
   "sbb{w}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "HI")])
 
@@ -7625,6 +7653,7 @@
   "ix86_binary_operator_ok (MINUS, SImode, operands)"
   "sbb{l}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "SI")])
 
@@ -15244,6 +15273,7 @@
   "reload_completed"
   "ret"
   [(set_attr "length" "1")
+   (set_attr "atom_unit" "jeu")
    (set_attr "length_immediate" "0")
    (set_attr "modrm" "0")])
 
@@ -15256,6 +15286,7 @@
   "reload_completed"
   "rep\;ret"
   [(set_attr "length" "1")
+   (set_attr "atom_unit" "jeu")
    (set_attr "length_immediate" "0")
    (set_attr "prefix_rep" "1")
    (set_attr "modrm" "0")])
@@ -15266,6 +15297,7 @@
   "reload_completed"
   "ret\t%0"
   [(set_attr "length" "3")
+   (set_attr "atom_unit" "jeu")
    (set_attr "length_immediate" "2")
    (set_attr "modrm" "0")])
 
@@ -16387,6 +16419,7 @@
   "TARGET_SSE_MATH"
   "%vrcpss\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "rcp")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "SF")])
 
@@ -16738,6 +16771,7 @@
   "TARGET_SSE_MATH"
   "%vrsqrtss\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "rcp")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "SF")])
 
@@ -16758,6 +16792,7 @@
   "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
   "%vsqrts<ssemodefsuffix>\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "sqrt")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")
    (set_attr "athlon_decode" "*")
@@ -19811,6 +19846,7 @@
   ; Since we don't have the proper number of operands for an alu insn,
   ; fill in all the blanks.
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "memory" "none")
    (set_attr "imm_disp" "false")
@@ -19826,6 +19862,7 @@
   ""
   "sbb{q}\t%0, %0"
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "memory" "none")
    (set_attr "imm_disp" "false")
@@ -19869,6 +19906,7 @@
   ; Since we don't have the proper number of operands for an alu insn,
   ; fill in all the blanks.
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "memory" "none")
    (set_attr "imm_disp" "false")
@@ -19884,6 +19922,7 @@
   ""
   "sbb{l}\t%0, %0"
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "memory" "none")
    (set_attr "imm_disp" "false")
@@ -20216,7 +20255,8 @@
     }
 }
   [(set (attr "type")
-	(cond [(eq_attr "alternative" "0")
+	(cond [(and (eq_attr "alternative" "0") 
+	            (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
 		 (const_string "alu")
 	       (match_operand:SI 2 "const0_operand" "")
 		 (const_string "imov")
@@ -20259,7 +20299,8 @@
     }
 }
   [(set (attr "type")
-	(cond [(eq_attr "alternative" "0")
+	(cond [(and (eq_attr "alternative" "0")
+	            (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
 		 (const_string "alu")
 	       (match_operand:DI 2 "const0_operand" "")
 		 (const_string "imov")
@@ -21751,6 +21792,7 @@
   return patterns[locality];
 }
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "prefetch")
    (set_attr "memory" "none")])
 
 (define_insn "*prefetch_sse_rex"
@@ -21769,6 +21811,7 @@
   return patterns[locality];
 }
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "prefetch")
    (set_attr "memory" "none")])
 
 (define_insn "*prefetch_3dnow"
Index: gcc/config/i386/atom.md
===================================================================
--- a/src/gcc/config/i386/atom.md	(revision 0)
+++ b/src/gcc/config/i386/atom.md	(revision 0)
@@ -0,0 +1,795 @@
+;; Atom Scheduling
+;; Copyright (C) 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; Atom is an in-order core with two integer pipelines.
+
+
+(define_attr "atom_unit" "sishuf,simul,jeu,complex,other" 
+  (const_string "other"))
+
+(define_attr "atom_sse_attr" "rcp,movdup,lfence,fence,prefetch,sqrt,mxcsr,other"
+  (const_string "other"))
+
+(define_automaton "atom")
+
+;;  Atom has two ports: port 0 and port 1 connecting to all execution units
+(define_cpu_unit "atom-port-0,atom-port-1" "atom")
+
+;;  EU: Execution Unit
+;;  Atom EUs are connected by port 0 or port 1. 
+
+(define_cpu_unit "atom-eu-0, atom-eu-1,
+                  atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4"
+                  "atom")
+
+;; Some EUs have duplicated copied and can be accessed via either
+;; port 0 or port 1
+;; (define_reservation "atom-port-either" "(atom-port-0 | atom-port-1)")
+
+;;; Some instructions is dual-pipe execution, need both ports
+;;; Complex multi-op macro-instructoins need both ports and all EUs
+(define_reservation "atom-port-dual" "(atom-port-0 + atom-port-1)")
+(define_reservation "atom-all-eu" "(atom-eu-0 + atom-eu-1 + 
+                                    atom-imul-1 + atom-imul-2 + atom-imul-3 +
+                                    atom-imul-4)")
+
+;;; Most of simple instructions have 1 cycle latency. Some of them
+;;; issue in port 0, some in port 0 and some in either port.
+(define_reservation "atom-simple-0" "(atom-port-0 + atom-eu-0)")
+(define_reservation "atom-simple-1" "(atom-port-1 + atom-eu-1)")
+(define_reservation "atom-simple-either" "(atom-simple-0 | atom-simple-1)")
+
+;;; Some insn issues in port 0 with 3 cycle latency and 1 cycle tput
+(define_reservation "atom-eu-0-3-1" "(atom-port-0 + atom-eu-0, nothing*2)")
+
+;;; fmul insn can have 4 or 5 cycles latency
+(define_reservation "atom-fmul-5c" "(atom-port-0 + atom-eu-0), nothing*4")
+(define_reservation "atom-fmul-4c" "(atom-port-0 + atom-eu-0), nothing*3")
+
+;;; fadd can has 5 cycles latency depends on instruction forms
+(define_reservation "atom-fadd-5c" "(atom-port-1 + atom-eu-1), nothing*5")
+
+;;; imul insn has 5 cycles latency
+(define_reservation "atom-imul-32" 
+                    "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4, 
+                     atom-port-0")
+;;; imul instruction excludes other non-FP instructions.
+(exclusion_set "atom-eu-0, atom-eu-1" 
+               "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4")
+
+;;; dual-execution instructions can have 1,2,4,5 cycles latency depends on 
+;;; instruction forms
+(define_reservation "atom-dual-1c" "(atom-port-dual + atom-eu-0 + atom-eu-1)")
+(define_reservation "atom-dual-2c"
+                    "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing)")
+(define_reservation "atom-dual-5c"
+                    "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing*4)")
+
+;;; Complex macro-instruction has variants of latency, and uses both ports.
+(define_reservation "atom-complex" "(atom-port-dual + atom-all-eu)")
+
+(define_insn_reservation  "atom_other" 9
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "other")
+            (eq_attr "atom_unit" "!jeu")))
+  "atom-complex, atom-all-eu*8")
+
+;; return has type "other" with atom_unit "jeu"
+(define_insn_reservation  "atom_other_2" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "other")
+            (eq_attr "atom_unit" "jeu")))
+  "atom-dual-1c")
+
+(define_insn_reservation  "atom_multi" 9
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "multi"))
+  "atom-complex, atom-all-eu*8")
+
+;; Normal alu insns without carry
+(define_insn_reservation  "atom_alu" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "none")
+                 (eq_attr "use_carry" "0"))))
+  "atom-simple-either")
+
+;; Normal alu insns without carry
+(define_insn_reservation  "atom_alu_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "!none")
+                 (eq_attr "use_carry" "0"))))
+  "atom-simple-either")
+
+;; Alu insn consuming CF, such as add/sbb
+(define_insn_reservation  "atom_alu_carry" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "none")
+                 (eq_attr "use_carry" "1"))))
+  "atom-simple-either")
+
+;; Alu insn consuming CF, such as add/sbb
+(define_insn_reservation  "atom_alu_carry_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "!none")
+                (eq_attr "use_carry" "1"))))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_alu1" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu1")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_alu1_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu1")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_negnot" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "negnot")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_negnot_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "negnot")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_imov" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imov")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_imov_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imov")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+;; 16<-16, 32<-32
+(define_insn_reservation  "atom_imovx" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "none")
+                 (ior (and (match_operand:HI 0 "register_operand")
+                           (match_operand:HI 1 "general_operand"))
+                      (and (match_operand:SI 0 "register_operand")
+                           (match_operand:SI 1 "general_operand"))))))
+  "atom-simple-either")
+
+;; 16<-16, 32<-32, mem
+(define_insn_reservation  "atom_imovx_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "!none")
+                 (ior (and (match_operand:HI 0 "register_operand")
+                           (match_operand:HI 1 "general_operand"))
+                      (and (match_operand:SI 0 "register_operand")
+                           (match_operand:SI 1 "general_operand"))))))
+  "atom-simple-either")
+
+;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8
+(define_insn_reservation  "atom_imovx_2" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "none")
+                 (ior (match_operand:QI 0 "register_operand")
+                      (ior (and (match_operand:SI 0 "register_operand")
+                                (not (match_operand:SI 1 "general_operand")))
+                           (match_operand:DI 0 "register_operand"))))))
+  "atom-simple-0")
+
+;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8, mem
+(define_insn_reservation  "atom_imovx_2_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "!none")
+                 (ior (match_operand:QI 0 "register_operand")
+                      (ior (and (match_operand:SI 0 "register_operand")
+                                (not (match_operand:SI 1 "general_operand")))
+                           (match_operand:DI 0 "register_operand"))))))
+  "atom-simple-0")
+
+;; 16<-8
+(define_insn_reservation  "atom_imovx_3" 3
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imovx")
+            (and (match_operand:HI 0 "register_operand")
+                 (match_operand:QI 1 "general_operand"))))
+  "atom-complex, atom-all-eu*2")
+
+(define_insn_reservation  "atom_lea" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "lea")
+            (eq_attr "mode" "!HI")))
+  "atom-simple-either")
+
+;; lea 16bit address is complex insn
+(define_insn_reservation  "atom_lea_2" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "lea")
+            (eq_attr "mode" "HI")))
+  "atom-complex, atom-all-eu")
+
+(define_insn_reservation  "atom_incdec" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "incdec")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_incdec_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "incdec")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+;; simple shift instruction use SHIFT eu, none memory
+(define_insn_reservation  "atom_ishift" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ishift")
+            (and (eq_attr "memory" "none") (eq_attr "prefix_0f" "0"))))
+  "atom-simple-0")
+
+;; simple shift instruction use SHIFT eu, memory
+(define_insn_reservation  "atom_ishift_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ishift")
+            (and (eq_attr "memory" "!none") (eq_attr "prefix_0f" "0"))))
+  "atom-simple-0")
+
+;; DF shift (prefixed with 0f) is complex insn with latency of 7 cycles
+(define_insn_reservation  "atom_ishift_3" 7
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ishift")
+            (eq_attr "prefix_0f" "1")))
+  "atom-complex, atom-all-eu*6")
+
+(define_insn_reservation  "atom_ishift1" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ishift1")
+            (eq_attr "memory" "none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_ishift1_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ishift1")
+            (eq_attr "memory" "!none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_rotate" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "rotate")
+            (eq_attr "memory" "none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_rotate_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "rotate")
+            (eq_attr "memory" "!none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_rotate1" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "rotate1")
+            (eq_attr "memory" "none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_rotate1_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "rotate1")
+            (eq_attr "memory" "!none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_imul" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imul")
+            (and (eq_attr "memory" "none") (eq_attr "mode" "SI"))))
+  "atom-imul-32")
+
+(define_insn_reservation  "atom_imul_mem" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imul")
+            (and (eq_attr "memory" "!none") (eq_attr "mode" "SI"))))
+  "atom-imul-32")
+
+;; latency set to 10 as common 64x64 imul
+(define_insn_reservation  "atom_imul_3" 10
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imul")
+            (eq_attr "mode" "!SI")))
+  "atom-complex, atom-all-eu*9")
+
+(define_insn_reservation  "atom_idiv" 65
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "idiv"))
+  "atom-complex, atom-all-eu*32, nothing*32")
+
+(define_insn_reservation  "atom_icmp" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "icmp")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_icmp_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "icmp")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_test" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "test")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_test_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "test")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_ibr" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ibr")
+            (eq_attr "memory" "!load")))
+  "atom-simple-1")
+
+;; complex if jump target is from address
+(define_insn_reservation  "atom_ibr_2" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ibr")
+            (eq_attr "memory" "load")))
+  "atom-complex, atom-all-eu")
+
+(define_insn_reservation  "atom_setcc" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "setcc")
+            (eq_attr "memory" "!store")))
+  "atom-simple-either")
+
+;; 2 cycles complex if target is in memory
+(define_insn_reservation  "atom_setcc_2" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "setcc")
+            (eq_attr "memory" "store")))
+  "atom-complex, atom-all-eu")
+
+(define_insn_reservation  "atom_icmov" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "icmov")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_icmov_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "icmov")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+;; UCODE if segreg, ignored
+(define_insn_reservation  "atom_push" 2
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "push"))
+  "atom-dual-2c")
+
+;; pop r64 is 1 cycle. UCODE if segreg, ignored
+(define_insn_reservation  "atom_pop" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "pop")
+            (eq_attr "mode" "DI")))
+  "atom-dual-1c")
+
+;; pop non-r64 is 2 cycles. UCODE if segreg, ignored
+(define_insn_reservation  "atom_pop_2" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "pop")
+            (eq_attr "mode" "!DI")))
+  "atom-dual-2c")
+
+;; UCODE if segreg, ignored
+(define_insn_reservation  "atom_call" 1
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "call"))
+  "atom-dual-1c")
+
+(define_insn_reservation  "atom_callv" 1
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "callv"))
+  "atom-dual-1c")
+
+(define_insn_reservation  "atom_leave" 3
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "leave"))
+  "atom-complex, atom-all-eu*2")
+
+(define_insn_reservation  "atom_str" 3
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "str"))
+  "atom-complex, atom-all-eu*2")
+
+(define_insn_reservation  "atom_sselog" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sselog")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_sselog_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sselog")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_sselog1" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sselog1")
+            (eq_attr "memory" "none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_sselog1_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sselog1")
+            (eq_attr "memory" "!none")))
+  "atom-simple-0")
+
+;; not pmad, not psad
+(define_insn_reservation  "atom_sseiadd" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseiadd")
+            (and (not (match_operand:V2DI 0 "register_operand"))
+                 (and (eq_attr "atom_unit" "!simul")
+                      (eq_attr "atom_unit" "!complex")))))
+  "atom-simple-either")
+
+;; pmad, psad and 64
+(define_insn_reservation  "atom_sseiadd_2" 4
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseiadd")
+            (and (not (match_operand:V2DI 0 "register_operand"))
+                 (and (eq_attr "atom_unit" "simul" )
+                      (eq_attr "mode" "DI")))))
+  "atom-fmul-4c")
+
+;; pmad, psad and 128
+(define_insn_reservation  "atom_sseiadd_3" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseiadd")
+            (and (not (match_operand:V2DI 0 "register_operand"))
+                 (and (eq_attr "atom_unit" "simul" )
+                      (eq_attr "mode" "TI")))))
+  "atom-fmul-5c")
+
+;; if paddq(64 bit op), phadd/phsub
+(define_insn_reservation  "atom_sseiadd_4" 6
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseiadd")
+            (ior (match_operand:V2DI 0 "register_operand")
+                 (eq_attr "atom_unit" "complex"))))
+  "atom-complex, atom-all-eu*5")
+
+;; if immediate op. 
+(define_insn_reservation  "atom_sseishft" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseishft")
+            (and (eq_attr "atom_unit" "!sishuf")
+                 (match_operand 2 "immediate_operand"))))
+  "atom-simple-either")
+
+;; if palignr or psrldq
+(define_insn_reservation  "atom_sseishft_2" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseishft")
+            (and (eq_attr "atom_unit" "sishuf")
+                 (match_operand 2 "immediate_operand"))))
+  "atom-simple-0")
+
+;; if reg/mem op
+(define_insn_reservation  "atom_sseishft_3" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseishft")
+            (not (match_operand 2 "immediate_operand"))))
+  "atom-complex, atom-all-eu")
+
+(define_insn_reservation  "atom_sseimul" 1
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "sseimul"))
+  "atom-simple-0")
+
+;; rcpss or rsqrtss
+(define_insn_reservation  "atom_sse" 4
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sse")
+            (and (eq_attr "atom_sse_attr" "rcp") (eq_attr "mode" "SF"))))
+  "atom-fmul-4c")
+
+;; movshdup, movsldup. Suggest to type sseishft
+(define_insn_reservation  "atom_sse_2" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sse")
+            (eq_attr "atom_sse_attr" "movdup")))
+  "atom-simple-0")
+
+;; lfence
+(define_insn_reservation  "atom_sse_3" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sse")
+            (eq_attr "atom_sse_attr" "lfence")))
+  "atom-simple-either")
+
+;; sfence,clflush,mfence, prefetch
+(define_insn_reservation  "atom_sse_4" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sse")
+            (ior (eq_attr "atom_sse_attr" "fence")
+                 (eq_attr "atom_sse_attr" "prefetch"))))
+  "atom-simple-0")
+
+;; rcpps, rsqrtss, sqrt, ldmxcsr
+(define_insn_reservation  "atom_sse_5" 7
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sse")
+            (ior (ior (eq_attr "atom_sse_attr" "sqrt")
+                      (eq_attr "atom_sse_attr" "mxcsr"))
+                 (and (eq_attr "atom_sse_attr" "rcp")
+                      (eq_attr "mode" "V4SF")))))
+  "atom-complex, atom-all-eu*6")
+
+;; xmm->xmm
+(define_insn_reservation  "atom_ssemov" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemov")
+            (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "xy"))))
+  "atom-simple-either")
+
+;; reg->xmm
+(define_insn_reservation  "atom_ssemov_2" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemov")
+            (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "r"))))
+  "atom-simple-0")
+
+;; xmm->reg
+(define_insn_reservation  "atom_ssemov_3" 3
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemov")
+            (and (match_operand 0 "register_operand" "r") (match_operand 1 "register_operand" "xy"))))
+  "atom-eu-0-3-1")
+
+;; mov mem
+(define_insn_reservation  "atom_ssemov_4" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemov")
+            (and (eq_attr "movu" "0") (eq_attr "memory" "!none"))))
+  "atom-simple-0")
+
+;; movu mem
+(define_insn_reservation  "atom_ssemov_5" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemov")
+            (ior (eq_attr "movu" "1") (eq_attr "memory" "!none"))))
+  "atom-complex, atom-all-eu")
+
+;; no memory simple
+(define_insn_reservation  "atom_sseadd" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseadd")
+            (and (eq_attr "memory" "none")
+                 (and (eq_attr "mode" "!V2DF")
+                      (eq_attr "atom_unit" "!complex")))))
+  "atom-fadd-5c")
+
+;; memory simple
+(define_insn_reservation  "atom_sseadd_mem" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseadd")
+            (and (eq_attr "memory" "!none")
+                 (and (eq_attr "mode" "!V2DF")
+                      (eq_attr "atom_unit" "!complex")))))
+  "atom-dual-5c")
+
+;; maxps, minps, *pd, hadd, hsub
+(define_insn_reservation  "atom_sseadd_3" 8
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseadd")
+            (ior (eq_attr "mode" "V2DF") (eq_attr "atom_unit" "complex"))))
+  "atom-complex, atom-all-eu*7")
+
+;; Except dppd/dpps
+(define_insn_reservation  "atom_ssemul" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemul")
+            (eq_attr "mode" "!SF")))
+  "atom-fmul-5c")
+
+;; Except dppd/dpps, 4 cycle if mulss
+(define_insn_reservation  "atom_ssemul_2" 4
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemul")
+            (eq_attr "mode" "SF")))
+  "atom-fmul-4c")
+
+(define_insn_reservation  "atom_ssecmp" 1
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "ssecmp"))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_ssecomi" 10
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "ssecomi"))
+  "atom-complex, atom-all-eu*9")
+
+;; no memory and cvtpi2ps, cvtps2pi, cvttps2pi
+(define_insn_reservation  "atom_ssecvt" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssecvt")
+            (ior (and (match_operand:V2SI 0 "register_operand")
+                      (match_operand:V4SF 1 "register_operand"))
+                 (and (match_operand:V4SF 0 "register_operand")
+                      (match_operand:V2SI 1 "register_operand")))))
+  "atom-fadd-5c")
+
+;; memory and cvtpi2ps, cvtps2pi, cvttps2pi
+(define_insn_reservation  "atom_ssecvt_2" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssecvt")
+            (ior (and (match_operand:V2SI 0 "register_operand")
+                      (match_operand:V4SF 1 "memory_operand"))
+                 (and (match_operand:V4SF 0 "register_operand")
+                      (match_operand:V2SI 1 "memory_operand")))))
+  "atom-dual-5c")
+
+;; otherwise. 7 cycles average for cvtss2sd
+(define_insn_reservation  "atom_ssecvt_3" 7
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssecvt")
+            (not (ior (and (match_operand:V2SI 0 "register_operand")
+                           (match_operand:V4SF 1 "nonimmediate_operand"))
+                      (and (match_operand:V4SF 0 "register_operand")
+                           (match_operand:V2SI 1 "nonimmediate_operand"))))))
+  "atom-complex, atom-all-eu*6")
+
+;; memory and cvtsi2sd
+(define_insn_reservation  "atom_sseicvt" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseicvt")
+            (and (match_operand:V2DF 0 "register_operand")
+                 (match_operand:SI 1 "memory_operand"))))
+  "atom-dual-5c")
+
+;; otherwise. 8 cycles average for cvtsd2si
+(define_insn_reservation  "atom_sseicvt_2" 8
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseicvt")
+            (not (and (match_operand:V2DF 0 "register_operand")
+                      (match_operand:SI 1 "memory_operand")))))
+  "atom-complex, atom-all-eu*7")
+
+(define_insn_reservation  "atom_ssediv" 62
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "ssediv"))
+  "atom-complex, atom-all-eu*12, nothing*49")
+
+;; simple for fmov
+(define_insn_reservation  "atom_fmov" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "fmov")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+;; simple for fmov
+(define_insn_reservation  "atom_fmov_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "fmov")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+;; Define bypass here
+
+;; There will be no stall from lea to non-mem EX insns
+(define_bypass 0 "atom_lea"
+                 "atom_alu_carry,
+                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+                  atom_incdec, atom_setcc, atom_icmov, atom_pop")
+
+(define_bypass 0 "atom_lea"
+                 "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_imovx_mem, atom_imovx_2_mem,
+                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+                 "!ix86_agi_dependent")
+
+;; There will be 3 cycles stall from EX insns to AGAN insns LEA
+(define_bypass 4 "atom_alu_carry,
+                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+                  atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
+                  atom_rotate1, atom_setcc, atom_icmov, atom_pop,
+                  atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_imovx_mem, atom_imovx_2_mem,
+                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+                 "atom_lea")
+
+;; There will be 3 cycles stall from EX insns to insns need addr calculation
+(define_bypass 4 "atom_alu_carry,
+                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+                  atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
+                  atom_rotate1, atom_setcc, atom_icmov, atom_pop,
+                  atom_imovx_mem, atom_imovx_2_mem,
+                  atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+                 "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_negnot_mem, atom_imov_mem, atom_incdec_mem,
+                  atom_imovx_mem, atom_imovx_2_mem,
+                  atom_imul_mem, atom_icmp_mem,
+                  atom_test_mem, atom_icmov_mem, atom_sselog_mem,
+                  atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem,
+                  atom_ishift_mem, atom_ishift1_mem, 
+                  atom_rotate_mem, atom_rotate1_mem"
+                  "ix86_agi_dependent")
+
+;; Stall from imul to lea is 8 cycles.
+(define_bypass 9 "atom_imul, atom_imul_mem" "atom_lea")
+
+;; Stall from imul to memory address is 8 cycles.
+(define_bypass 9 "atom_imul, atom_imul_mem" 
+                 "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_negnot_mem, atom_imov_mem, atom_incdec_mem,
+                  atom_ishift_mem, atom_ishift1_mem, atom_rotate_mem,
+                  atom_rotate1_mem, atom_imul_mem, atom_icmp_mem,
+                  atom_test_mem, atom_icmov_mem, atom_sselog_mem,
+                  atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem"
+                  "ix86_agi_dependent")
+
+;; There will be 0 cycle stall from cmp/test to jcc
+
+;; There will be 1 cycle stall from flag producer to cmov and adc/sbb
+(define_bypass 2 "atom_icmp, atom_test, atom_alu, atom_alu_carry,
+                  atom_alu1, atom_negnot, atom_incdec, atom_ishift,
+                  atom_ishift1, atom_rotate, atom_rotate1"
+                 "atom_icmov, atom_alu_carry")
+
+;; lea to shift count stall is 2 cycles
+(define_bypass 3 "atom_lea"
+                 "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1,
+                  atom_ishift_mem, atom_ishift1_mem, 
+                  atom_rotate_mem, atom_rotate1_mem"
+                 "ix86_dep_by_shift_count")
+
+;; lea to shift source stall is 1 cycle
+(define_bypass 2 "atom_lea"
+                 "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1"
+                 "!ix86_dep_by_shift_count")
+
+;; non-lea to shift count stall is 1 cycle
+(define_bypass 2 "atom_alu_carry,
+                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+                  atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
+                  atom_rotate1, atom_setcc, atom_icmov, atom_pop,
+                  atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_imovx_mem, atom_imovx_2_mem,
+                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+                 "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1,
+                  atom_ishift_mem, atom_ishift1_mem, 
+                  atom_rotate_mem, atom_rotate1_mem"
+                 "ix86_dep_by_shift_count")
Index: gcc/config/i386/sse.md
===================================================================
--- a/src/gcc/config/i386/sse.md	(revision 146514)
+++ b/src/gcc/config/i386/sse.md	(working copy)
@@ -338,6 +338,7 @@
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<MODE>")])
 
@@ -363,6 +364,7 @@
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "avx_movdqu<avxmodesuffix>"
@@ -373,6 +375,7 @@
   "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "vmovdqu\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<avxvecmode>")])
 
@@ -383,6 +386,7 @@
   "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "movdqu\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
@@ -424,7 +428,7 @@
 		     UNSPEC_MOVNT))]
   "TARGET_SSE2"
   "movntdq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
+  [(set_attr "type" "ssemov")
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
@@ -434,7 +438,7 @@
 		   UNSPEC_MOVNT))]
   "TARGET_SSE2"
   "movnti\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
+  [(set_attr "type" "ssemov")
    (set_attr "mode" "V2DF")])
 
 (define_insn "avx_lddqu<avxmodesuffix>"
@@ -445,6 +449,7 @@
   "TARGET_AVX"
   "vlddqu\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecvt")
+   (set_attr "movu" "1")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<avxvecmode>")])
 
@@ -454,7 +459,8 @@
 		      UNSPEC_LDDQU))]
   "TARGET_SSE3"
   "lddqu\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
+  [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "TI")])
 
@@ -761,6 +767,7 @@
   "TARGET_SSE"
   "%vrcpps\t{%1, %0|%0, %1}"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "rcp")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "V4SF")])
 
@@ -787,6 +794,7 @@
   "TARGET_SSE"
   "rcpss\t{%1, %0|%0, %1}"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "rcp")
    (set_attr "mode" "SF")])
 
 (define_expand "sqrtv8sf2"
@@ -832,6 +840,7 @@
   "TARGET_SSE"
   "%vsqrtps\t{%1, %0|%0, %1}"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "sqrt")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "V4SF")])
 
@@ -876,6 +885,7 @@
   "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
   "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "sqrt")
    (set_attr "mode" "<ssescalarmode>")])
 
 (define_expand "rsqrtv8sf2"
@@ -1039,7 +1049,7 @@
 	 (const_int 1)))]
   "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
   "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sse")
+  [(set_attr "type" "sseadd")
    (set_attr "mode" "<ssescalarmode>")])
 
 ;; These versions of the min/max patterns implement exactly the operations
@@ -1175,6 +1185,7 @@
   "TARGET_SSE3"
   "addsubpd\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "mode" "V2DF")])
 
 (define_insn "avx_h<plusminus_insn>v4df3"
@@ -1298,6 +1309,7 @@
   "TARGET_SSE3"
   "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "V4SF")])
 
@@ -5066,6 +5078,7 @@
   "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
   "pmaddwd\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "simul")
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
@@ -7025,6 +7038,7 @@
    movq\t{%H1, %0|%0, %H1}
    mov{q}\t{%H1, %0|%0, %H1}"
   [(set_attr "type" "ssemov,sseishft,ssemov,imov")
+   (set_attr "atom_unit" "*,sishuf,*,*")
    (set_attr "memory" "*,none,*,*")
    (set_attr "mode" "V2SF,TI,TI,DI")])
 
@@ -7057,6 +7071,7 @@
    psrldq\t{$8, %0|%0, 8}
    movq\t{%H1, %0|%0, %H1}"
   [(set_attr "type" "ssemov,sseishft,ssemov")
+   (set_attr "atom_unit" "*,sishuf,*")
    (set_attr "memory" "*,none,*")
    (set_attr "mode" "V2SF,TI,TI")])
 
@@ -7614,6 +7629,7 @@
   "TARGET_SSE2"
   "psadbw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "simul")
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
@@ -7635,7 +7651,7 @@
 	  UNSPEC_MOVMSK))]
   "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
   "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
+  [(set_attr "type" "ssemov")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")])
 
@@ -7645,7 +7661,7 @@
 		   UNSPEC_MOVMSK))]
   "TARGET_SSE2"
   "%vpmovmskb\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
+  [(set_attr "type" "ssemov")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "SI")])
@@ -7668,7 +7684,7 @@
   "TARGET_SSE2 && !TARGET_64BIT"
   ;; @@@ check ordering of operands in intel/nonintel syntax
   "%vmaskmovdqu\t{%2, %1|%1, %2}"
-  [(set_attr "type" "ssecvt")
+  [(set_attr "type" "ssemov")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
@@ -7682,7 +7698,7 @@
   "TARGET_SSE2 && TARGET_64BIT"
   ;; @@@ check ordering of operands in intel/nonintel syntax
   "%vmaskmovdqu\t{%2, %1|%1, %2}"
-  [(set_attr "type" "ssecvt")
+  [(set_attr "type" "ssemov")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
@@ -7693,6 +7709,7 @@
   "TARGET_SSE"
   "%vldmxcsr\t%0"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "mxcsr")
    (set_attr "prefix" "maybe_vex")
    (set_attr "memory" "load")])
 
@@ -7702,6 +7719,7 @@
   "TARGET_SSE"
   "%vstmxcsr\t%0"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "mxcsr")
    (set_attr "prefix" "maybe_vex")
    (set_attr "memory" "store")])
 
@@ -7720,6 +7738,7 @@
   "TARGET_SSE || TARGET_3DNOW_A"
   "sfence"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "fence")
    (set_attr "memory" "unknown")])
 
 (define_insn "sse2_clflush"
@@ -7728,6 +7747,7 @@
   "TARGET_SSE2"
   "clflush\t%a0"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "fence")
    (set_attr "memory" "unknown")])
 
 (define_expand "sse2_mfence"
@@ -7745,6 +7765,7 @@
   "TARGET_64BIT || TARGET_SSE2"
   "mfence"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "fence")
    (set_attr "memory" "unknown")])
 
 (define_expand "sse2_lfence"
@@ -7762,6 +7783,7 @@
   "TARGET_SSE2"
   "lfence"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "lfence")
    (set_attr "memory" "unknown")])
 
 (define_insn "sse3_mwait"
@@ -7885,6 +7907,7 @@
   "TARGET_SSSE3"
   "phaddw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
@@ -7913,6 +7936,7 @@
   "TARGET_SSSE3"
   "phaddw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "DI")])
 
@@ -7967,6 +7991,7 @@
   "TARGET_SSSE3"
   "phaddd\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
@@ -7987,6 +8012,7 @@
   "TARGET_SSSE3"
   "phaddd\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "DI")])
 
@@ -8073,6 +8099,7 @@
   "TARGET_SSSE3"
   "phaddsw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
@@ -8101,6 +8128,7 @@
   "TARGET_SSSE3"
   "phaddsw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "DI")])
 
@@ -8187,6 +8215,7 @@
   "TARGET_SSSE3"
   "phsubw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
@@ -8215,6 +8244,7 @@
   "TARGET_SSSE3"
   "phsubw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "DI")])
 
@@ -8269,6 +8299,7 @@
   "TARGET_SSSE3"
   "phsubd\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
@@ -8289,6 +8320,7 @@
   "TARGET_SSSE3"
   "phsubd\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "DI")])
 
@@ -8375,6 +8407,7 @@
   "TARGET_SSSE3"
   "phsubsw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
@@ -8403,6 +8436,7 @@
   "TARGET_SSSE3"
   "phsubsw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "DI")])
 
@@ -8509,6 +8543,7 @@
   "TARGET_SSSE3"
   "pmaddubsw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "simul")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
@@ -8547,6 +8582,7 @@
   "TARGET_SSSE3"
   "pmaddubsw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "simul")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "DI")])
 
@@ -8754,6 +8790,7 @@
   return "palignr\t{%3, %2, %0|%0, %2, %3}";
 }
   [(set_attr "type" "sseishft")
+   (set_attr "atom_unit" "sishuf")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
@@ -8770,6 +8807,7 @@
   return "palignr\t{%3, %2, %0|%0, %2, %3}";
 }
   [(set_attr "type" "sseishft")
+   (set_attr "atom_unit" "sishuf")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "DI")])
 
@@ -8956,7 +8994,7 @@
 		     UNSPEC_MOVNTDQA))]
   "TARGET_SSE4_1"
   "%vmovntdqa\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
+  [(set_attr "type" "ssemov")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
Index: gcc/config/i386/i386-c.c
===================================================================
--- a/src/gcc/config/i386/i386-c.c	(revision 146514)
+++ b/src/gcc/config/i386/i386-c.c	(working copy)
@@ -119,6 +119,10 @@
       def_or_undef (parse_in, "__core2");
       def_or_undef (parse_in, "__core2__");
       break;
+    case PROCESSOR_ATOM:
+      def_or_undef (parse_in, "__atom");
+      def_or_undef (parse_in, "__atom__");
+      break;
     /* use PROCESSOR_max to not set/unset the arch macro.  */
     case PROCESSOR_max:
       break;
@@ -187,6 +191,9 @@
     case PROCESSOR_CORE2:
       def_or_undef (parse_in, "__tune_core2__");
       break;
+    case PROCESSOR_ATOM:
+      def_or_undef (parse_in, "__tune_atom__");
+      break;
     case PROCESSOR_GENERIC32:
     case PROCESSOR_GENERIC64:
       break;
Index: gcc/config/i386/i386-protos.h
===================================================================
--- a/src/gcc/config/i386/i386-protos.h	(revision 146514)
+++ b/src/gcc/config/i386/i386-protos.h	(working copy)
@@ -85,6 +85,9 @@
 extern void ix86_expand_binary_operator (enum rtx_code,
 					 enum machine_mode, rtx[]);
 extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
+extern bool ix86_lea_for_add_ok (enum rtx_code, rtx, rtx[]);
+extern bool ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn);
+extern bool ix86_agi_dependent (rtx set_insn, rtx use_insn);
 extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
 					rtx[]);
 extern rtx ix86_build_const_vector (enum machine_mode, bool, rtx);
Index: gcc/config/i386/i386.c
===================================================================
--- a/src/gcc/config/i386/i386.c	(revision 146514)
+++ b/src/gcc/config/i386/i386.c	(working copy)
@@ -1036,6 +1036,79 @@
   1,                                    /* cond_not_taken_branch_cost.  */
 };
 
+static const
+struct processor_costs atom_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*                               HI */
+   COSTS_N_INSNS (3),			/*                               SI */
+   COSTS_N_INSNS (4),			/*                               DI */
+   COSTS_N_INSNS (2)},			/*                               other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*                          HI */
+   COSTS_N_INSNS (42),			/*                          SI */
+   COSTS_N_INSNS (74),			/*                          DI */
+   COSTS_N_INSNS (74)},			/*                          other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  17,					/* MOVE_RATIO */
+  2,					/* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {12, 12, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {8, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {8, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {8, 8, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {8, 8, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  32,					/* size of l1 cache.  */
+  256,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  3,					/* Branch cost */
+  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
+  {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
+   {libcall, {{32, loop}, {64, rep_prefix_4_byte},
+          {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+  {{libcall, {{8, loop}, {15, unrolled_loop},
+          {2048, rep_prefix_4_byte}, {-1, libcall}}},
+   {libcall, {{24, loop}, {32, unrolled_loop},
+          {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+  1,                                    /* scalar_stmt_cost.  */
+  1,                                    /* scalar load_cost.  */
+  1,                                    /* scalar_store_cost.  */
+  1,                                    /* vec_stmt_cost.  */
+  1,                                    /* vec_to_scalar_cost.  */
+  1,                                    /* scalar_to_vec_cost.  */
+  1,                                    /* vec_align_load_cost.  */
+  2,                                    /* vec_unalign_load_cost.  */
+  1,                                    /* vec_store_cost.  */
+  3,                                    /* cond_taken_branch_cost.  */
+  1,                                    /* cond_not_taken_branch_cost.  */
+};
+
 /* Generic64 should produce code tuned for Nocona and K8.  */
 static const
 struct processor_costs generic64_cost = {
@@ -1194,6 +1267,7 @@
 #define m_PENT4  (1<<PROCESSOR_PENTIUM4)
 #define m_NOCONA  (1<<PROCESSOR_NOCONA)
 #define m_CORE2  (1<<PROCESSOR_CORE2)
+#define m_ATOM  (1<<PROCESSOR_ATOM)
 
 #define m_GEODE  (1<<PROCESSOR_GEODE)
 #define m_K6  (1<<PROCESSOR_K6)
@@ -1231,10 +1305,11 @@
   m_486 | m_PENT,
 
   /* X86_TUNE_UNROLL_STRLEN */
-  m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
+  m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
+  | m_CORE2 | m_GENERIC,
 
   /* X86_TUNE_DEEP_BRANCH_PREDICTION */
-  m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
+  m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
 
   /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
      on simulation result. But after P4 was made, no performance benefit
@@ -1246,12 +1321,12 @@
   ~m_386,
 
   /* X86_TUNE_USE_SAHF */
-  m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
+  m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
   | m_NOCONA | m_CORE2 | m_GENERIC,
 
   /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
      partial dependencies.  */
-  m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
+  m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
   | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
 
   /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
@@ -1271,13 +1346,13 @@
   m_386 | m_486 | m_K6_GEODE,
 
   /* X86_TUNE_USE_SIMODE_FIOP */
-  ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
+  ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
 
   /* X86_TUNE_USE_MOV0 */
   m_K6,
 
   /* X86_TUNE_USE_CLTD */
-  ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
+  ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
 
   /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx.  */
   m_PENT4,
@@ -1292,8 +1367,8 @@
   ~(m_PENT | m_PPRO),
 
   /* X86_TUNE_PROMOTE_QIMODE */
-  m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
-  | m_GENERIC /* | m_PENT4 ? */,
+  m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
+  | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
 
   /* X86_TUNE_FAST_PREFIX */
   ~(m_PENT | m_486 | m_386),
@@ -1317,26 +1392,28 @@
   m_PPRO,
 
   /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop.  */
-  m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+  m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
+  | m_CORE2 | m_GENERIC,
 
   /* X86_TUNE_ADD_ESP_8 */
-  m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
+  m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
   | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
 
   /* X86_TUNE_SUB_ESP_4 */
-  m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+  m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
+  | m_GENERIC,
 
   /* X86_TUNE_SUB_ESP_8 */
-  m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
+  m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
   | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
 
   /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
      for DFmode copies */
-  ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
+  ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
     | m_GENERIC | m_GEODE),
 
   /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
-  m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+  m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
 
   /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
      conflict here in between PPro/Pentium4 based chips that thread 128bit
@@ -1347,7 +1424,8 @@
      shows that disabling this option on P4 brings over 20% SPECfp regression,
      while enabling it on K8 brings roughly 2.4% regression that can be partly
      masked by careful scheduling of moves.  */
-  m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
+  m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
+  | m_AMDFAM10,
 
   /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
   m_AMDFAM10,
@@ -1365,13 +1443,13 @@
   m_PPRO | m_PENT4 | m_NOCONA,
 
   /* X86_TUNE_MEMORY_MISMATCH_STALL */
-  m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+  m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
 
   /* X86_TUNE_PROLOGUE_USING_MOVE */
-  m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
+  m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
 
   /* X86_TUNE_EPILOGUE_USING_MOVE */
-  m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
+  m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
 
   /* X86_TUNE_SHIFT1 */
   ~m_486,
@@ -1380,29 +1458,32 @@
   m_AMD_MULTIPLE,
 
   /* X86_TUNE_INTER_UNIT_MOVES */
-  ~(m_AMD_MULTIPLE | m_GENERIC),
+  ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
 
   /* X86_TUNE_INTER_UNIT_CONVERSIONS */
   ~(m_AMDFAM10),
 
   /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
      than 4 branch instructions in the 16 byte window.  */
-  m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+  m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
+  | m_GENERIC,
 
   /* X86_TUNE_SCHEDULE */
-  m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
+  m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
+  | m_GENERIC,
 
   /* X86_TUNE_USE_BT */
-  m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
+  m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
 
   /* X86_TUNE_USE_INCDEC */
-  ~(m_PENT4 | m_NOCONA | m_GENERIC),
+  ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
 
   /* X86_TUNE_PAD_RETURNS */
   m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
 
   /* X86_TUNE_EXT_80387_CONSTANTS */
-  m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
+  m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
+  | m_CORE2 | m_GENERIC,
 
   /* X86_TUNE_SHORTEN_X87_SSE */
   ~m_K8,
@@ -1447,6 +1528,10 @@
      with a subsequent conditional jump instruction into a single
      compare-and-branch uop.  */
   m_CORE2,
+
+  /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
+     will impact LEA instruction selection. */
+  m_ATOM,
 };
 
 /* Feature tests against the various architecture variations.  */
@@ -1472,10 +1557,11 @@
 };
 
 static const unsigned int x86_accumulate_outgoing_args
-  = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
+  = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
+    | m_GENERIC;
 
 static const unsigned int x86_arch_always_fancy_math_387
-  = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
+  = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
     | m_NOCONA | m_CORE2 | m_GENERIC;
 
 static enum stringop_alg stringop_alg = no_stringop;
@@ -1953,7 +2039,8 @@
   {&core2_cost, 16, 10, 16, 10, 16},
   {&generic32_cost, 16, 7, 16, 7, 16},
   {&generic64_cost, 16, 10, 16, 10, 16},
-  {&amdfam10_cost, 32, 24, 32, 7, 32}
+  {&amdfam10_cost, 32, 24, 32, 7, 32},
+  {&atom_cost, 16, 7, 16, 7, 16}
 };
 
 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
@@ -1971,6 +2058,7 @@
   "prescott",
   "nocona",
   "core2",
+  "atom",
   "geode",
   "k6",
   "k6-2",
@@ -2529,6 +2617,9 @@
       {"core2", PROCESSOR_CORE2, CPU_CORE2,
 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
 	| PTA_SSSE3 | PTA_CX16},
+      {"atom", PROCESSOR_ATOM, CPU_ATOM,
+	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+	| PTA_SSSE3 | PTA_CX16},
       {"geode", PROCESSOR_GEODE, CPU_GEODE,
 	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
       {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
@@ -12903,6 +12994,316 @@
     emit_move_insn (operands[0], dst);
 }
 
+#define LEA_SEARCH_THRESHOLD 12
+
+/* Search backward for non-agu definition of register number REGNO1
+   or register number REGNO2 in INSN's basic block until 
+   1. Pass LEA_SEARCH_THRESHOLD instructions, or
+   2. Reach BB boundary, or
+   3. Reach agu definition.
+   Returns the distance between the non-agu definition point and INSN.
+   If no definition point, returns -1.  */
+
+static int
+distance_non_agu_define (unsigned int regno1, unsigned int regno2,
+			 rtx insn)
+{
+  basic_block bb = BLOCK_FOR_INSN (insn);
+  int distance = 0;
+  df_ref *def_rec;
+  enum attr_type insn_type;
+
+  if (insn != BB_HEAD (bb))
+    {
+      rtx prev = PREV_INSN (insn);
+      while (prev && distance < LEA_SEARCH_THRESHOLD)
+	{
+	  if (INSN_P (prev))
+	    {
+	      distance++;
+              for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
+                if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+                    && !DF_REF_IS_ARTIFICIAL (*def_rec)
+                    && (regno1 == DF_REF_REGNO (*def_rec)
+			|| regno2 == DF_REF_REGNO (*def_rec)))
+		  {
+		    insn_type = get_attr_type (prev);
+		    if (insn_type != TYPE_LEA)
+		      goto done;
+		  }
+	    }
+	  if (prev == BB_HEAD (bb))
+	    break;
+	  prev = PREV_INSN (prev);
+	}
+    }
+  
+  if (distance < LEA_SEARCH_THRESHOLD)
+    {
+      edge e;
+      edge_iterator ei;
+      bool simple_loop = false;
+  
+      FOR_EACH_EDGE (e, ei, bb->preds)
+	if (e->src == bb)
+	  {
+	    simple_loop = true;
+	    break;
+	  }
+  
+      if (simple_loop)
+	{
+	  rtx prev = BB_END (bb);
+	  while (prev
+		 && prev != insn
+		 && distance < LEA_SEARCH_THRESHOLD)
+	    {
+	      if (INSN_P (prev))
+		{
+		  distance++;
+		  for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
+		    if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+			&& !DF_REF_IS_ARTIFICIAL (*def_rec)
+			&& (regno1 == DF_REF_REGNO (*def_rec)
+			    || regno2 == DF_REF_REGNO (*def_rec)))
+		      {
+			insn_type = get_attr_type (prev);
+			if (insn_type != TYPE_LEA)
+			  goto done;
+		      }
+		}
+	      prev = PREV_INSN (prev);
+	    }
+	}
+    }
+
+  distance = -1;
+
+done:
+  /* get_attr_type may modify recog data.  We want to make sure
+     that recog data is valid for instruction INSN, on which
+     distance_non_agu_define is called.  INSN is unchanged here.  */
+  extract_insn_cached (insn);
+  return distance;
+}
+
+/* Return the distance between INSN and the next insn that uses 
+   register number REGNO0 in memory address.  Return -1 if no such
+   a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set.  */
+
+static int
+distance_agu_use (unsigned int regno0, rtx insn)
+{
+  basic_block bb = BLOCK_FOR_INSN (insn);
+  int distance = 0;
+  df_ref *def_rec;
+  df_ref *use_rec;
+
+  if (insn != BB_END (bb))
+    {
+      rtx next = NEXT_INSN (insn);
+      while (next && distance < LEA_SEARCH_THRESHOLD)
+	{
+	  if (INSN_P (next))
+	    {
+	      distance++;
+
+	      for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
+		if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
+		     || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
+		    && regno0 == DF_REF_REGNO (*use_rec))
+		  {
+		    /* Return DISTANCE if OP0 is used in memory
+		       address in NEXT.  */
+		    return distance;
+		  }
+
+	      for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
+		if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+		    && !DF_REF_IS_ARTIFICIAL (*def_rec)
+		    && regno0 == DF_REF_REGNO (*def_rec))
+		  {
+		    /* Return -1 if OP0 is set in NEXT.  */
+		    return -1;
+		  }
+	    }
+	  if (next == BB_END (bb))
+	    break;
+	  next = NEXT_INSN (next);
+	}
+    }
+
+  if (distance < LEA_SEARCH_THRESHOLD)
+    {
+      edge e;
+      edge_iterator ei;
+      bool simple_loop = false;
+  
+      FOR_EACH_EDGE (e, ei, bb->succs)
+        if (e->dest == bb)
+	  {
+	    simple_loop = true;
+	    break;
+	  }
+  
+      if (simple_loop)
+	{
+	  rtx next = BB_HEAD (bb);
+	  while (next
+		 && next != insn
+		 && distance < LEA_SEARCH_THRESHOLD)
+	    {
+	      if (INSN_P (next))
+		{
+		  distance++;
+
+		  for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
+		    if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
+			 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
+			&& regno0 == DF_REF_REGNO (*use_rec))
+		      {
+			/* Return DISTANCE if OP0 is used in memory
+			   address in NEXT.  */
+			return distance;
+		      }
+
+		  for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
+		    if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+			&& !DF_REF_IS_ARTIFICIAL (*def_rec)
+			&& regno0 == DF_REF_REGNO (*def_rec))
+		      {
+			/* Return -1 if OP0 is set in NEXT.  */
+			return -1;
+		      }
+
+		}
+	      next = NEXT_INSN (next);
+	    }
+	}
+    }  
+
+  return -1;
+}
+
+/* Define this macro to tune LEA priority vs ADD, it take effect when
+   there is a dilemma of choicing LEA or ADD
+   Negative value: ADD is more preferred than LEA
+   Zero: Netrual
+   Positive value: LEA is more preferred than ADD*/
+#define IX86_LEA_PRIORITY 2
+
+/* Return true if it is ok to optimize an ADD operation to LEA
+   operation to avoid flag register consumation.  For the processors
+   like ATOM, if the destination register of LEA holds an actual
+   address which will be used soon, LEA is better and otherwise ADD
+   is better.  */
+
+bool
+ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
+                     rtx insn, rtx operands[])
+{
+  unsigned int regno0 = true_regnum (operands[0]);
+  unsigned int regno1 = true_regnum (operands[1]);
+  unsigned int regno2;
+
+  if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
+    return regno0 != regno1;
+
+  regno2 = true_regnum (operands[2]);
+
+  /* If a = b + c, (a!=b && a!=c), must use lea form. */
+  if (regno0 != regno1 && regno0 != regno2)
+    return true;
+  else    
+    {
+      int dist_define, dist_use;
+      dist_define = distance_non_agu_define (regno1, regno2, insn);
+      if (dist_define <= 0)
+        return true;
+
+      /* If this insn has both backward non-agu dependence and forward
+         agu dependence, the one with short distance take effect. */
+      dist_use = distance_agu_use (regno0, insn);
+      if (dist_use <= 0
+	  || (dist_define + IX86_LEA_PRIORITY) < dist_use)
+        return false;
+
+      return true;
+    }
+}
+
+/* Return true if destination reg of SET_BODY is shift count of
+   USE_BODY.  */
+
+static bool
+ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
+{
+  rtx set_dest;
+  rtx shift_rtx;
+  int i;
+
+  /* Retrieve destination of SET_BODY.  */
+  switch (GET_CODE (set_body))
+    {
+    case SET:
+      set_dest = SET_DEST (set_body);
+      if (!set_dest || !REG_P (set_dest))
+	return false;
+      break;
+    case PARALLEL:
+      for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
+	if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
+					  use_body))
+	  return true;
+    default:
+      return false;
+      break;
+    }
+
+  /* Retrieve shift count of USE_BODY.  */
+  switch (GET_CODE (use_body))
+    {
+    case SET:
+      shift_rtx = XEXP (use_body, 1);
+      break;
+    case PARALLEL:
+      for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
+	if (ix86_dep_by_shift_count_body (set_body,
+					  XVECEXP (use_body, 0, i)))
+	  return true;
+    default:
+      return false;
+      break;
+    }
+
+  if (shift_rtx 
+      && (GET_CODE (shift_rtx) == ASHIFT
+	  || GET_CODE (shift_rtx) == LSHIFTRT
+	  || GET_CODE (shift_rtx) == ASHIFTRT
+	  || GET_CODE (shift_rtx) == ROTATE
+	  || GET_CODE (shift_rtx) == ROTATERT))
+    {
+      rtx shift_count = XEXP (shift_rtx, 1);
+
+      /* Return true if shift count is dest of SET_BODY.  */
+      if (REG_P (shift_count)
+	  && true_regnum (set_dest) == true_regnum (shift_count))
+	return true;
+    }
+
+  return false;
+}
+
+/* Return true if destination reg of SET_INSN is shift count of
+   USE_INSN.  */
+
+bool
+ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
+{
+  return ix86_dep_by_shift_count_body (PATTERN (set_insn),
+				       PATTERN (use_insn));
+}
+
 /* Return TRUE or FALSE depending on whether the unary operator meets the
    appropriate constraints.  */
 
@@ -19022,6 +19423,7 @@
   switch (ix86_tune)
     {
     case PROCESSOR_PENTIUM:
+    case PROCESSOR_ATOM:
     case PROCESSOR_K6:
       return 2;
 
@@ -19088,41 +19490,21 @@
   return 1;
 }
 
-/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
-   address with operands set by DEP_INSN.  */
+/* Return true iff USE_INSN has a memory address with operands set by
+   SET_INSN.  */
 
-static int
-ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
+bool
+ix86_agi_dependent (rtx set_insn, rtx use_insn)
 {
-  rtx addr;
-
-  if (insn_type == TYPE_LEA
-      && TARGET_PENTIUM)
-    {
-      addr = PATTERN (insn);
-
-      if (GET_CODE (addr) == PARALLEL)
-	addr = XVECEXP (addr, 0, 0);
-
-      gcc_assert (GET_CODE (addr) == SET);
-
-      addr = SET_SRC (addr);
-    }
-  else
-    {
-      int i;
-      extract_insn_cached (insn);
-      for (i = recog_data.n_operands - 1; i >= 0; --i)
-	if (MEM_P (recog_data.operand[i]))
-	  {
-	    addr = XEXP (recog_data.operand[i], 0);
-	    goto found;
-	  }
-      return 0;
-    found:;
-    }
-
-  return modified_in_p (addr, dep_insn);
+  int i;
+  extract_insn_cached (use_insn);
+  for (i = recog_data.n_operands - 1; i >= 0; --i)
+    if (MEM_P (recog_data.operand[i]))
+      {
+	rtx addr = XEXP (recog_data.operand[i], 0);
+	return modified_in_p (addr, set_insn) != 0;
+      }
+  return false;
 }
 
 static int
@@ -19150,7 +19532,20 @@
     {
     case PROCESSOR_PENTIUM:
       /* Address Generation Interlock adds a cycle of latency.  */
-      if (ix86_agi_dependent (insn, dep_insn, insn_type))
+      if (insn_type == TYPE_LEA)
+	{
+	  rtx addr = PATTERN (insn);
+
+	  if (GET_CODE (addr) == PARALLEL)
+	    addr = XVECEXP (addr, 0, 0);
+
+	  gcc_assert (GET_CODE (addr) == SET);
+
+	  addr = SET_SRC (addr);
+	  if (modified_in_p (addr, dep_insn))
+	    cost += 1;
+	}
+      else if (ix86_agi_dependent (dep_insn, insn))
 	cost += 1;
 
       /* ??? Compares pair with jump/setcc.  */
@@ -19160,7 +19555,7 @@
       /* Floating point stores require value to be ready one cycle earlier.  */
       if (insn_type == TYPE_FMOV
 	  && get_attr_memory (insn) == MEMORY_STORE
-	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
+	  && !ix86_agi_dependent (dep_insn, insn))
 	cost += 1;
       break;
 
@@ -19183,7 +19578,7 @@
 	 in parallel with previous instruction in case
 	 previous instruction is not needed to compute the address.  */
       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
-	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
+	  && !ix86_agi_dependent (dep_insn, insn))
 	{
 	  /* Claim moves to take one cycle, as core can issue one load
 	     at time and the next load can start cycle later.  */
@@ -19212,7 +19607,7 @@
 	 in parallel with previous instruction in case
 	 previous instruction is not needed to compute the address.  */
       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
-	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
+	  && !ix86_agi_dependent (dep_insn, insn))
 	{
 	  /* Claim moves to take one cycle, as core can issue one load
 	     at time and the next load can start cycle later.  */
@@ -19229,6 +19624,7 @@
     case PROCESSOR_ATHLON:
     case PROCESSOR_K8:
     case PROCESSOR_AMDFAM10:
+    case PROCESSOR_ATOM:
     case PROCESSOR_GENERIC32:
     case PROCESSOR_GENERIC64:
       memory = get_attr_memory (insn);
@@ -19237,7 +19633,7 @@
 	 in parallel with previous instruction in case
 	 previous instruction is not needed to compute the address.  */
       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
-	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
+	  && !ix86_agi_dependent (dep_insn, insn))
 	{
 	  enum attr_unit unit = get_attr_unit (insn);
 	  int loadcost = 3;
